Abstract
Background We have run the simplified Naomi model using a range of inference methods.
Task In this report, we compare the accuracy of the posterior distributions obtained from these inference methods.
We compare the inference results from TMB, aghq and tmbstan.
Import these inference results as follows:
tmb <- readRDS("depends/tmb.rds")
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.4.1
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
aghq <- readRDS("depends/aghq.rds")
tmbstan <- readRDS("depends/tmbstan.rds")
For more information about the conditions under which these results were generated, see:
depends <- yaml::read_yaml("orderly.yml")$depends
for(i in seq_along(depends)) {
report_name <- names(depends[[i]])
print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
print(orderly::orderly_info(report_id, report_name)$parameters)
}
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230119-142551-23704e7b and was run with the following parameters:"
## $tmb
## [1] TRUE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $ndConstruction
## [1] "product"
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE && parameter:k == 1)"
## [1] "Obtained report had ID 20230119-170459-9b07786e and was run with the following parameters:"
## $aghq
## [1] TRUE
##
## $k
## [1] 1
##
## $ndConstruction
## [1] "product"
##
## $tmb
## [1] FALSE
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
##
## $niter
## [1] 20000
##
## $nthin
## [1] 20
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $nsample
## [1] 1000
All of the possible parameter names are as follows:
unique(names(tmb$fit$obj$env$par))
## [1] "beta_rho" "beta_alpha" "beta_lambda" "beta_anc_rho" "beta_anc_alpha" "logit_phi_rho_x" "log_sigma_rho_x"
## [8] "logit_phi_rho_xs" "log_sigma_rho_xs" "logit_phi_rho_a" "log_sigma_rho_a" "logit_phi_rho_as" "log_sigma_rho_as" "log_sigma_rho_xa"
## [15] "u_rho_x" "us_rho_x" "u_rho_xs" "us_rho_xs" "u_rho_a" "u_rho_as" "logit_phi_alpha_x"
## [22] "log_sigma_alpha_x" "logit_phi_alpha_xs" "log_sigma_alpha_xs" "logit_phi_alpha_a" "log_sigma_alpha_a" "logit_phi_alpha_as" "log_sigma_alpha_as"
## [29] "log_sigma_alpha_xa" "u_alpha_x" "us_alpha_x" "u_alpha_xs" "us_alpha_xs" "u_alpha_a" "u_alpha_as"
## [36] "u_alpha_xa" "OmegaT_raw" "log_betaT" "log_sigma_lambda_x" "ui_lambda_x" "log_sigma_ancrho_x" "log_sigma_ancalpha_x"
## [43] "ui_anc_rho_x" "ui_anc_alpha_x" "log_sigma_or_gamma" "log_or_gamma"
data.frame(
"TMB" = tmb$time,
"aghq" = aghq$time,
"tmbstan" = tmbstan$time
)
histogram_plot("beta_rho", i = 1)
histogram_plot("beta_anc_rho")
ks_helper <- function(par) to_ks_df(par) %>% ks_plot(par = par)
ks_helper("beta")
ks_helper("logit")
ks_helper("log_sigma")
ks_helper("u_rho_x")
ks_helper("u_rho_xs")
ks_helper("us_rho_x")
ks_helper("us_rho_xs")
ks_helper("u_rho_a")
ks_helper("u_rho_as")
ks_helper("u_alpha_x")
ks_helper("u_alpha_xs")
ks_helper("us_alpha_x")
ks_helper("us_alpha_xs")
ks_helper("u_alpha_a")
ks_helper("u_alpha_as")
ks_helper("u_alpha_xa")
ks_helper("ui_anc_rho_x")
ks_helper("ui_anc_alpha_x")
ks_helper("log_or_gamma")
ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
to_ks_df(x) %>%
group_by(method) %>%
summarise(ks = mean(ks), par = x)
}) %>%
bind_rows() %>%
pivot_wider(names_from = "method", values_from = "ks") %>%
rename(
"Parameter" = "par",
"KS(aghq, tmbstan)" = "aghq",
"KS(TMB, tmbstan)" = "TMB",
)
ks_summary %>%
gt::gt() %>%
gt::fmt_number(
columns = starts_with("KS"),
decimals = 3
)
| Parameter | KS(aghq, tmbstan) | KS(TMB, tmbstan) |
|---|---|---|
| beta_rho | 0.090 | 0.085 |
| beta_alpha | 0.101 | 0.103 |
| beta_lambda | 0.061 | 0.074 |
| beta_anc_rho | 0.105 | 0.090 |
| beta_anc_alpha | 0.044 | 0.025 |
| logit_phi_rho_x | 0.536 | 0.118 |
| log_sigma_rho_x | 0.646 | 0.195 |
| logit_phi_rho_xs | 0.510 | 0.111 |
| log_sigma_rho_xs | 0.613 | 0.203 |
| logit_phi_rho_a | 0.552 | 0.080 |
| log_sigma_rho_a | 0.585 | 0.113 |
| logit_phi_rho_as | 0.569 | 0.104 |
| log_sigma_rho_as | 0.589 | 0.134 |
| log_sigma_rho_xa | 0.669 | 0.224 |
| u_rho_x | 0.092 | 0.095 |
| us_rho_x | 0.062 | 0.062 |
| u_rho_xs | 0.117 | 0.121 |
| us_rho_xs | 0.037 | 0.044 |
| u_rho_a | 0.087 | 0.083 |
| u_rho_as | 0.084 | 0.076 |
| logit_phi_alpha_x | 0.531 | 0.107 |
| log_sigma_alpha_x | 0.557 | 0.143 |
| logit_phi_alpha_xs | 0.551 | 0.143 |
| log_sigma_alpha_xs | 0.540 | 0.159 |
| logit_phi_alpha_a | 0.525 | 0.083 |
| log_sigma_alpha_a | 0.541 | 0.111 |
| logit_phi_alpha_as | 0.516 | 0.081 |
| log_sigma_alpha_as | 0.514 | 0.098 |
| log_sigma_alpha_xa | 0.627 | 0.146 |
| u_alpha_x | 0.096 | 0.089 |
| us_alpha_x | 0.084 | 0.083 |
| u_alpha_xs | 0.101 | 0.096 |
| us_alpha_xs | 0.097 | 0.095 |
| u_alpha_a | 0.085 | 0.084 |
| u_alpha_as | 0.096 | 0.102 |
| u_alpha_xa | 0.069 | 0.059 |
| OmegaT_raw | 0.518 | 0.022 |
| log_betaT | 0.689 | 0.244 |
| log_sigma_lambda_x | 0.736 | 0.269 |
| ui_lambda_x | 0.158 | 0.160 |
| log_sigma_ancrho_x | 0.504 | 0.037 |
| log_sigma_ancalpha_x | 0.519 | 0.083 |
| ui_anc_rho_x | 0.055 | 0.057 |
| ui_anc_alpha_x | 0.063 | 0.065 |
| log_sigma_or_gamma | 0.524 | 0.036 |
| log_or_gamma | 0.059 | 0.060 |
ggplot(ks_summary, aes(x = `KS(TMB, tmbstan)`, y = `KS(aghq, tmbstan)`)) +
geom_point(alpha = 0.5) +
xlim(0, 1) +
ylim(0, 1) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
labs(x = "KS(TMB, tmbstan)", y = "KS(aghq, tmbstan)") +
theme_minimal()
ks_summary %>%
mutate(`KS difference` = `KS(TMB, tmbstan)` - `KS(aghq, tmbstan)`) %>%
ggplot(aes(x = `KS difference`)) +
geom_boxplot(width = 0.5) +
coord_flip() +
labs(x = "KS(TMB, tmbstan) - KS(aghq, tmbstan)") +
theme_minimal()
The following parameters have KS values greater than 0.5 in both dimensions.
(big_ks <- ks_summary %>%
filter(`KS(aghq, tmbstan)` + `KS(TMB, tmbstan)` > 0.5) %>%
pull(Parameter))
## [1] "logit_phi_rho_x" "log_sigma_rho_x" "logit_phi_rho_xs" "log_sigma_rho_xs" "logit_phi_rho_a" "log_sigma_rho_a" "logit_phi_rho_as"
## [8] "log_sigma_rho_as" "log_sigma_rho_xa" "logit_phi_alpha_x" "log_sigma_alpha_x" "logit_phi_alpha_xs" "log_sigma_alpha_xs" "logit_phi_alpha_a"
## [15] "log_sigma_alpha_a" "logit_phi_alpha_as" "log_sigma_alpha_as" "log_sigma_alpha_xa" "OmegaT_raw" "log_betaT" "log_sigma_lambda_x"
## [22] "log_sigma_ancrho_x" "log_sigma_ancalpha_x" "log_sigma_or_gamma"
Let’s look into this further by plotting the histograms:
lapply(big_ks, histogram_plot)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
#' To write!
Suppose we have two sets of samples from the posterior.
For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios.
We can extract the TMB objective function for the log-likelihood as follows:
tmb$fit$obj$fn()
## [1] NaN